R Programming

Set Chunk requirements

knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

loading Relevant packages and Data Set

#Import relevant packages

## tidyverse includes readr, ggplot2, dplyr, forcats, tibble, tidyr, purrr, stringr
library(tidyverse) 
library(readxl)
library(janitor)
library(lubridate)
library(scales)
library(plotly)


## loading the csv data set
setwd('F:\\Documents\\Reinp\\GitHub Respositories\\Nairobi-Securities-Exchange-Trading-Summary')

NSE_Stock_prices_2016_2020 <- read_excel("data/NSE Stock prices 2017 - 2020.xlsx",
                              sheet = "NSE Stock data 2017 - 2020")


NSE_Stock_prices_2021_2025 <- read_excel("data/NSE Stock prices 2021 - 2025.xlsx",
                              sheet = "NSE Stock data 2021 - 2025")


NSE_unique <- read_excel("data/NSE Stock prices 2021 - 2025.xlsx",
                              sheet = "Unique")


#combine the data frames by rows


NSE_Stock_prices_raw <- rbind(NSE_Stock_prices_2016_2020, NSE_Stock_prices_2021_2025)
NSE_Stock_prices <- merge(NSE_unique %>%
                                      select(-4, -7)
                     , NSE_Stock_prices_raw %>%
                        select(-4, -11, -12) ,
                     by.x = "CODE", 
                     by.y = "CODE") #InnerJoin

NSE_Stock_prices <- NSE_Stock_prices%>%
  janitor:: clean_names()%>%
  mutate(date = ymd(date))%>%
  arrange(date, sector, name)



View(NSE_Stock_prices)

Structure of the Data

head(NSE_Stock_prices)

tail(NSE_Stock_prices)

# How many variables and observations are there?
ncol(NSE_Stock_prices)

nrow(NSE_Stock_prices)

#learn more about the dataset

class(NSE_Stock_prices)
typeof(NSE_Stock_prices) 
length(NSE_Stock_prices)

Cleaning Data

Missing Values

# check number of missing values in our data
sapply(NSE_Stock_prices,function(x) sum(is.na(x)))
##                                                  code 
##                                                     0 
##                                       securities_name 
##                                                     0 
##                                 security_type_general 
##                                                     0 
##                                                sector 
##                                                     0 
##  isin_code_intentional_security_identification_number 
##                                                  7292 
##                                                  date 
##                                                     0 
##                                                  name 
##                                                     0 
##                                    last_12_months_low 
##                                                     0 
##                                   last_12_months_high 
##                                                     0 
##                                      days_trading_low 
##                                                     0 
##                                     days_trading_high 
##                                                     0 
##                               days_trading_vwap_price 
##                                                     0 
##                              previous_days_vwap_price 
##                                                     2 
##                               volume_of_shares_traded 
##                                                     0 
## vwap_adjusted_price_for_bonus_issues_and_share_splits 
##                                                 48707

Columns

NSE_Stock_prices <- NSE_Stock_prices%>% 
  mutate(volume_of_shares_traded = 
      ifelse( volume_of_shares_traded == "-" | is.na(volume_of_shares_traded),"0",
              volume_of_shares_traded))%>%
  mutate(volume_of_shares_traded = as.numeric(volume_of_shares_traded))

str(NSE_Stock_prices$volume_of_shares_traded)
##  num [1:81613] 3500 200 0 100 4900 ...

Adding Columns

NSE_Stock_prices1 <- NSE_Stock_prices%>%
  mutate(turn_over = days_trading_vwap_price * volume_of_shares_traded)%>%
  mutate(change = days_trading_vwap_price - previous_days_vwap_price )%>%
  mutate(percentage_change = round((change/previous_days_vwap_price)*100,3) )%>%
  mutate(day_month = day(date))%>%
                       mutate(month_name = month(date, label = TRUE))%>%
                       mutate(year = factor(year(date), ordered = TRUE))%>%
                       mutate(day_year = factor(yday(date), ordered = TRUE))%>% #day of year
                       mutate(week_year = factor(week(date), ordered = TRUE))%>% #week of year
                       mutate(week_year_date = ceiling_date(date, unit = "week"))%>% 
                       mutate(month_year_date = floor_date(date, unit = "month"))

View(NSE_Stock_prices1)

names(NSE_Stock_prices1) #display variable names  
##  [1] "code"                                                 
##  [2] "securities_name"                                      
##  [3] "security_type_general"                                
##  [4] "sector"                                               
##  [5] "isin_code_intentional_security_identification_number" 
##  [6] "date"                                                 
##  [7] "name"                                                 
##  [8] "last_12_months_low"                                   
##  [9] "last_12_months_high"                                  
## [10] "days_trading_low"                                     
## [11] "days_trading_high"                                    
## [12] "days_trading_vwap_price"                              
## [13] "previous_days_vwap_price"                             
## [14] "volume_of_shares_traded"                              
## [15] "vwap_adjusted_price_for_bonus_issues_and_share_splits"
## [16] "turn_over"                                            
## [17] "change"                                               
## [18] "percentage_change"                                    
## [19] "day_month"                                            
## [20] "month_name"                                           
## [21] "year"                                                 
## [22] "day_year"                                             
## [23] "week_year"                                            
## [24] "week_year_date"                                       
## [25] "month_year_date"
str(NSE_Stock_prices1)
## 'data.frame':    81613 obs. of  25 variables:
##  $ code                                                 : chr  "EGAD" "KUKZ" "KAPC" "LIMT" ...
##  $ securities_name                                      : chr  "Eaagads Ltd Ord 1.25 (AIMS)" "Kakuzi Plc Ord.5.00" "Kapchorua Tea Co. Ltd Ord Ord 5.00(AIMS)" "The Limuru Tea Co. Ltd Ord 20.00(AIMS)" ...
##  $ security_type_general                                : chr  "Shares" "Shares" "Shares" "Shares" ...
##  $ sector                                               : chr  "Agricultural" "Agricultural" "Agricultural" "Agricultural" ...
##  $ isin_code_intentional_security_identification_number : chr  "KE0000000208" "KE0000000281" "KE4000001760" "KE0000000356" ...
##  $ date                                                 : Date, format: "2017-01-03" "2017-01-03" ...
##  $ name                                                 : chr  "Eaagads Ltd" "Kakuzi Plc" "Kapchorua Tea Kenya Plc" "Limuru Tea Plc" ...
##  $ last_12_months_low                                   : chr  "10" "351" "68.5" "280" ...
##  $ last_12_months_high                                  : chr  "15" "405" "101" "400" ...
##  $ days_trading_low                                     : num  27.2 280 80 530 19.2 ...
##  $ days_trading_high                                    : num  28 280 80 530 21 178 27 8.95 13.5 118 ...
##  $ days_trading_vwap_price                              : num  27.8 280 80 530 19.9 ...
##  $ previous_days_vwap_price                             : num  27.2 309 80 530 19.2 ...
##  $ volume_of_shares_traded                              : num  3500 200 0 100 4900 ...
##  $ vwap_adjusted_price_for_bonus_issues_and_share_splits: chr  "-" "-" "-" "-" ...
##  $ turn_over                                            : num  97125 56000 0 53000 97510 ...
##  $ change                                               : num  0.5 -29 0 0 0.7 ...
##  $ percentage_change                                    : num  1.83 -9.38 0 0 3.65 ...
##  $ day_month                                            : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ month_name                                           : Ord.factor w/ 12 levels "Jan"<"Feb"<"Mar"<..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ year                                                 : Ord.factor w/ 5 levels "2017"<"2018"<..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ day_year                                             : Ord.factor w/ 362 levels "2"<"3"<"4"<"5"<..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ week_year                                            : Ord.factor w/ 53 levels "1"<"2"<"3"<"4"<..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ week_year_date                                       : Date, format: "2017-01-08" "2017-01-08" ...
##  $ month_year_date                                      : Date, format: "2017-01-01" "2017-01-01" ...

ggplot

total_traded_daily <- NSE_Stock_prices1%>%
 group_by(date, security_type_general)%>%
  summarise( total_volume_traded = sum(volume_of_shares_traded),
            total_turnover = sum(turn_over), .groups = 'drop')%>%
             drop_na()

total_traded_weekly <- NSE_Stock_prices1%>%
 group_by(week_year_date, security_type_general)%>%
  summarise( total_volume_traded = sum(volume_of_shares_traded),
            total_turnover = sum(turn_over), .groups = 'drop')%>%
             drop_na()

total_traded_monthly <- NSE_Stock_prices1%>%
 group_by(month_year_date, security_type_general)%>%
  summarise( total_volume_traded = sum(volume_of_shares_traded),
            total_turnover = sum(turn_over), .groups = 'drop')%>%
             drop_na()

total_traded_yearly <- NSE_Stock_prices1%>%
 group_by(year, security_type_general)%>%
  summarise( total_volume_traded = sum(volume_of_shares_traded),
            total_turnover = sum(turn_over), .groups = 'drop')%>%
             drop_na()
ggplot(data=total_traded_daily%>%
         filter(security_type_general != "Index")%>%
         arrange( security_type_general, date)) +
  geom_line(aes(x=date, y=total_volume_traded, colour=security_type_general))+
  scale_x_date(date_labels = "%d-%b-%y", date_breaks = "60 days")+ 
  scale_y_continuous(labels = scales::comma, n.breaks = 10) +
  theme(axis.text.x = element_text(angle = 90, size = 8, face = "bold"),
        plot.title = element_text(hjust = 0.5),
        legend.position="bottom")+
  labs(title="Daily volume traded ", x="day_date", y ="Total volume")+
  guides(col = guide_legend(ncol = 2))

ggplot(data=total_traded_daily%>%
         filter(security_type_general != "Index")%>%
         arrange( security_type_general, date)) +
  geom_line(aes(x=date, y=total_turnover, colour=security_type_general))+
  scale_x_date(date_labels = "%d-%b-%y", date_breaks = "60 days") +
  scale_y_continuous(labels = scales::comma, n.breaks = 10) +
  theme(axis.text.x = element_text(angle = 90, size = 8, face = "bold"),
        plot.title = element_text(hjust = 0.5),
        legend.position="bottom")+
  labs(title="Daily turnover traded ", x="day_date", y ="Total turnover (KES)")+
  guides(col = guide_legend(ncol = 2))

ggplot(data=total_traded_weekly%>% 
         filter(security_type_general != "Index")%>%
         arrange( security_type_general, week_year_date)) +
  geom_line(aes(x=week_year_date, y=total_volume_traded, colour=security_type_general))+
  scale_x_date(date_labels = "%d-%b-%y", date_breaks = "8 weeks") +
  scale_y_continuous(labels = scales::comma, n.breaks = 12) +
  theme(axis.text.x = element_text(angle = 90, size = 8, face = "bold"),
        plot.title = element_text(hjust = 0.5),
        legend.position="bottom")+
  labs(title="Weekly volume traded ", x="week_date", y ="Total volume")+
  guides(col = guide_legend(ncol = 2))

ggplot(data=total_traded_weekly%>% 
         filter(security_type_general != "Index")%>%
         arrange( security_type_general, week_year_date)) +
  geom_line(aes(x=week_year_date, y=total_turnover, colour=security_type_general))+
  scale_x_date(date_labels = "%d-%b-%y", date_breaks = "8 weeks") +
  scale_y_continuous(labels = scales::comma, n.breaks = 12) +
  theme(axis.text.x = element_text(angle = 90, size = 8, face = "bold"),
        plot.title = element_text(hjust = 0.5),
        legend.position="bottom")+
  labs(title="Weekly turnover traded ", x="week_date", y ="Total turnover (KES)")+
  guides(col = guide_legend(ncol = 2))

Forecasting

library(forecast)

Eaagads <- NSE_Stock_prices1%>%
  select(date, name, days_trading_vwap_price, volume_of_shares_traded, year)%>%
                       mutate(month_num = month(date, label = FALSE))%>%
  filter(name == "Eaagads Ltd")%>%
  mutate(date_numeric = as.numeric(date))


par(mfrow = c(1,2))
acf(as.ts(Eaagads$days_trading_vwap_price), main = "")
pacf(as.ts(Eaagads$days_trading_vwap_price), main = "")

arima1 <- auto.arima(as.ts(Eaagads$days_trading_vwap_price)
                     )
arima1
## Series: as.ts(Eaagads$days_trading_vwap_price) 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.9306  -0.9656
## s.e.  0.0255   0.0175
## 
## sigma^2 estimated as 0.2589:  log likelihood=-851.54
## AIC=1709.08   AICc=1709.1   BIC=1724.22
arima2 <- auto.arima(as.ts(Eaagads$days_trading_vwap_price)
                     ,xreg = cbind(Eaagads$year,
                                   Eaagads$month_num
                                   )
                     )
arima2
## Series: as.ts(Eaagads$days_trading_vwap_price) 
## Regression with ARIMA(1,1,1) errors 
## 
## Coefficients:
##          ar1      ma1   xreg1    xreg2
##       0.9307  -0.9649  0.0961  -0.0370
## s.e.  0.0261   0.0184  0.8146   0.0702
## 
## sigma^2 estimated as 0.2584:  log likelihood=-849.43
## AIC=1708.86   AICc=1708.91   BIC=1734.09
arima3 <- auto.arima(as.ts(Eaagads$days_trading_vwap_price)
                     ,xreg = cbind(Eaagads$year,
                                   Eaagads$month_num,
                                   Eaagads$volume_of_shares_traded
                                   )
                     )
arima3
## Series: as.ts(Eaagads$days_trading_vwap_price) 
## Regression with ARIMA(1,1,2) errors 
## 
## Coefficients:
##          ar1      ma1      ma2    drift   xreg1   xreg2  xreg3
##       0.9329  -0.9568  -0.0140  -0.0148  0.7541  0.0166      0
## s.e.  0.0257   0.0392   0.0316   0.0047  0.8347  0.0721      0
## 
## sigma^2 estimated as 0.2577:  log likelihood=-846.51
## AIC=1709.03   AICc=1709.15   BIC=1749.39

In order to qualitatively assess the goodness of the above arima models, let us compare the predicted values with the observed ones:

df1 <- tibble(observed = Eaagads$days_trading_vwap_price, predicted = as.numeric(arima1$fitted), time = Eaagads$date) %>% 
  mutate(abs_error = abs((observed - predicted)/observed*100))

df2 <- tibble(observed = Eaagads$days_trading_vwap_price, predicted = as.numeric(arima2$fitted), time = Eaagads$date) %>% 
  mutate(abs_error = abs((observed - predicted)/observed*100))

df3 <- tibble(observed = Eaagads$days_trading_vwap_price, predicted = as.numeric(arima3$fitted), time = Eaagads$date) %>% 
  mutate(abs_error = abs((observed - predicted)/observed*100))

ggplot(gather(df1 %>% select(-abs_error), obs_pred, value, -time)%>% 
         mutate(obs_pred = factor(obs_pred, levels = c("predicted", "observed"))), 
       aes(x = time, y = value, col = obs_pred)) +
  geom_line() +
  xlab("") + ylab("") +
   scale_color_manual(values=c("black", "hotpink")) +
 scale_x_date(date_labels = "%d-%b-%y", date_breaks = "60 days") +
  scale_y_continuous( n.breaks = 8) +
  theme_bw() + theme(legend.title = element_blank(),
                     axis.text.x  = element_text(angle=90, vjust=0.5, size = 8, face = "bold"))

ggplot(gather(df2 %>% select(-abs_error), obs_pred, value, -time)%>% 
         mutate(obs_pred = factor(obs_pred, levels = c("predicted", "observed"))), 
       aes(x = time, y = value, col = obs_pred)) +
  geom_line() +
  xlab("") + ylab("") +
   scale_color_manual(values=c("black", "hotpink")) +
 scale_x_date(date_labels = "%d-%b-%y", date_breaks = "60 days") +
  scale_y_continuous( n.breaks = 8) +
  theme_bw() + theme(legend.title = element_blank(),
                     axis.text.x  = element_text(angle=90, vjust=0.5, size = 8, face = "bold"))

ggplot(gather(df3 %>% select(-abs_error), obs_pred, value, -time)%>% 
         mutate(obs_pred = factor(obs_pred, levels = c("predicted", "observed"))), 
       aes(x = time, y = value, col = obs_pred)) +
  geom_line() +
  xlab("") + ylab("") +
   scale_color_manual(values=c("black", "hotpink")) +
 scale_x_date(date_labels = "%d-%b-%y", date_breaks = "60 days") +
  scale_y_continuous( n.breaks = 8) +
  theme_bw() + theme(legend.title = element_blank(),
                     axis.text.x  = element_text(angle=90, vjust=0.5, size = 8, face = "bold"))

#So first we split the dataset, allowing for a varying index:

train_index <- 916
n_total <- nrow(Eaagads)
Eaagads_train1 <- Eaagads[1:(train_index),]
Eaagads_test <- Eaagads[(train_index+1):n_total,]
predicted <- numeric(n_total-train_index)
predicted1 <- numeric(n_total-train_index)
predicted2 <- numeric(n_total-train_index)

#Then we apply a for cycle that iterates model and estimates one day ahead:

for (i in 1:(n_total-train_index)) {
  Eaagads_train <- Eaagads[1:(train_index-1+i),]
  arima_model <- auto.arima(as.ts(Eaagads$days_trading_vwap_price)
                     ,xreg = cbind(Eaagads$year, Eaagads$month_num
                                   )
                     )
  pred <- forecast(arima_model, 1,
                   xreg = cbind(Eaagads$year[i], 
                                                Eaagads$month_num[i]
                                )
                   )
  predicted[i] <- pred[["mean"]]
  predicted1[i] <- pred[["upper"]]
  predicted2[i] <- pred[["lower"]]
  #predicted[i] <- pred$upper
  #predicted[i] <- pred$lower
  #predicted[i] <- pred$fitted
}

df_pred <- tibble(obs = c(Eaagads_train1$days_trading_vwap_price, Eaagads_test$days_trading_vwap_price), 
                  predicted = c(Eaagads_train1$days_trading_vwap_price, predicted), 
                  predicted_lower = c(Eaagads_train1$days_trading_vwap_price, predicted2),
                  predicted_upper= c(Eaagads_train1$days_trading_vwap_price, predicted1),
                  time = Eaagads$date)
ggplot(gather(df_pred, obs_pred, value, -time) %>% 
         mutate(obs_pred = factor(obs_pred, levels = c("predicted_upper", "predicted", "predicted_lower",
                                                       "obs"))), 
       aes(x = time, y = value, col = obs_pred,
           #linetype = obs_pred
           )) +
  geom_line() +
  xlab("") + ylab("") +
  scale_color_manual(values=c("green", "black", "red", "hotpink")) +
  #scale_linetype_manual(values=c(6, 2, 4, 1)) +
  scale_x_date(date_labels = "%d-%b-%y", date_breaks = "60 days") +
  scale_y_continuous( n.breaks = 8) +
  theme_bw() + theme(legend.title = element_blank(),
                     axis.text.x  = element_text(angle=90, vjust=0.5, size = 8, face = "bold"))

When you are conducting an exploratory analysis of time-series data, you’ll need to identify trends while ignoring random fluctuations in your data.

1. Trend Lines - Global smoothers

One of the simplest methods to identify trends is to fit a ordinary least squares regression model to the data. The model most people are familiar with is the linear model, but you can add other polynomial terms for extra flexibility.

Avoid polynomials of degrees larger than three because they are less stable.

ti = 1:length(Eaagads$days_trading_vwap_price)
m1 = lm(Eaagads$days_trading_vwap_price~ti)
m2 = lm(Eaagads$days_trading_vwap_price~ti+I(ti^2))
m3 = lm(Eaagads$days_trading_vwap_price~ti+I(ti^2)+I(ti^3))
 

    plot_ly(x=Eaagads$date, y=Eaagads$days_trading_vwap_price, type="scatter", mode="lines",
            line=list(color=rgb(0.8,0.8,0.8,0.8), width=4), name="VWAP- Eaagads")%>%
  add_lines( x=Eaagads$date, y=predict(m1), line=list(dash="solid", width = 1.5, color=NULL),
             name="Linear")%>%
  add_lines( x=Eaagads$date, y=predict(m2), line=list(dash="solid", width = 1.5, color=NULL),
             name="Quadratic")%>%
  add_lines( x=Eaagads$date, y=predict(m3), line=list(dash="solid", width = 1.5, color=NULL),
             name="Cubic")%>%
  layout(title = "Global smoothers")

2. Trend Lines - Local Smoothers

Running line smoothers

The running line smoother reduces the bias by fitting a linear regression in a local neighborhood of the target value. A popular algorithm using the running line smoother is Friedman’s super smoother supsmu, which by default uses cross-validation to find the best span.

rlcv = supsmu(Eaagads$date, Eaagads$days_trading_vwap_price)
rlst = supsmu(Eaagads$date, Eaagads$days_trading_vwap_price, span = 0.05)
rllt = supsmu(Eaagads$date, Eaagads$days_trading_vwap_price, span = 0.75)
 
     plot_ly(x=Eaagads$date, y=Eaagads$days_trading_vwap_price,
             type="scatter", mode="lines", line = list(color=rgb(0.8,0.8,0.8,0.8), width=4),
             name="VWAP- Eaagads")%>%
    add_lines(x=Eaagads$date,y=rllt$y, line=list(dash="solid", width = 1.5, color=NULL),
              name="Span = 0.75")%>%
    add_lines(x=Eaagads$date,y=rlst$y, line=list(dash="solid", width = 1.5, color=NULL),
              name="Span = 0.05")%>%
    add_lines(x=Eaagads$date,y=rlcv$y, line=list(dash="solid", width = 1.5, color=NULL),
              name="Cross-validated span")%>%
    layout(title = "Running line smoothers",
           legend = list(orientation = 'h',
                       xanchor = "center",  # use center of legend as anchor
                     x = 0.5# put legend in center of x-axis
                     ))

Kernel smoothers

An alternative approach to specifying a neighborhood is to decrease weights further away from the target value. These estimates are much smoother than the results from either the running mean (moving average) or running line smoothers.

ks1 = ksmooth(Eaagads$date, Eaagads$days_trading_vwap_price, "normal", 60, x.points=Eaagads$date)
ks2 = ksmooth(Eaagads$date, Eaagads$days_trading_vwap_price, "normal", 30, x.points=Eaagads$date)
 
      plot_ly(x=Eaagads$date, y=Eaagads$days_trading_vwap_price, type="scatter", mode="lines",
              line=list(color=rgb(0.8,0.8,0.8,0.8), width=4),name="VWAP- Eaagads")%>%
      add_lines(x=ks1$x, y=ks1$y, line=list(dash="solid", width = 1.5, color=NULL),
                name="Bandwidth = 60")%>%
      add_lines(x=ks1$x, y=ks2$y, line=list(dash="solid", width = 1.5, color=NULL),
                name="Bandwidth = 30")%>%
      layout( title = "Kernel smoother")

Smoothing splines

Splines consist of a piece-wise polynomial with pieces defined by a sequence of knots where the pieces join smoothly. A smoothing splines is estimated by minimizing a criterion containing a penalty for both goodness of fit, and smoothness. The trade-off between the two is controlled by the smoothing parameter lambda, which is typically chosen by cross-validation.

In the base package, smooth.spline can be used to compute splines, but it is more common to use the GAM function in mgcv. Both functions use cross-validation to choose the default smoothing parameter; but the results vary between implementations.

Another advantage to using GAM is that it allows estimation of confidence intervals.

library(mgcv)
sp.base = smooth.spline(Eaagads$date, Eaagads$days_trading_vwap_price)

sp.cr = gam(Eaagads$days_trading_vwap_price~s(Eaagads$date_numeric, bs="cr"))
sp.gam = gam(Eaagads$days_trading_vwap_price~s(Eaagads$date_numeric))
sp.pred = predict(sp.gam, type="response", se.fit=TRUE)
sp.df = data.frame(x=sp.gam$model[,2], y=sp.pred$fit,
                    lb=as.numeric(sp.pred$fit - (1.96 * sp.pred$se.fit)),
                    ub=as.numeric(sp.pred$fit + (1.96 * sp.pred$se.fit)))
sp.df = sp.df[order(sp.df$x),]

    plot_ly(x=Eaagads$date, y=Eaagads$days_trading_vwap_price, type="scatter", mode="lines",
            line=list(color=rgb(0.8,0.8,0.8,0.8), width=4),name="VWAP- Eaagads")%>%
    add_lines(x=Eaagads$date, y=sp.df$y, name="GAM", line=list(color="#366092", width=2))%>%
    add_ribbons(x=as.Date(sp.df$x, origin="1970-01-01"), ymin=sp.df$lb, ymax=sp.df$ub,
                name="GAM 95% CI", line=list(color="#366092", opacity=0.4, width=0))%>%
    add_lines(x=Eaagads$date, y=predict(sp.base)$y, name="smooth.spline",
              line=list(color="orange", width=2))%>%
    layout(title="Smoothing splines")

Loess

LOESS (Locally Estimated Scatterplot Smoother) combines local regression with kernels by using locally weighted polynomial regression (by default, quadratic regression with tri-cubic weights).It also allows estimation of approximate confidence intervals.

However, it is important to note that unlike supsmu, smooth.spline or gam, loess does not use cross-validation. By default, the span is set to 0.75; that is, the estimated smooth at each target value consists of a local regression constructed using 75% of the data points closest to the target value. This span is fairly large and results in estimated values that are smoother than those from other methods.

ll.rough = loess(Eaagads$days_trading_vwap_price~ Eaagads$date_numeric, span=0.1)
ll.smooth = loess(Eaagads$days_trading_vwap_price~ Eaagads$date_numeric, span=0.75)
 
    plot_ly(x=Eaagads$date, y=Eaagads$days_trading_vwap_price,
               type="scatter", mode="lines", name="VWAP- Eaagads",
               line=list(color=rgb(0.8,0.8,0.8,0.8), width=4))%>%
       add_lines(x=Eaagads$date, y=predict(ll.smooth), name="Span = 0.75",
                 line =list(dash="solid", width = 1.5, color=NULL))%>%
       add_lines(x=Eaagads$date, y=fitted(ll.rough), name="Span = 0.10",
                 line =list(dash="solid", width = 1.5, color=NULL))%>%
      layout(title = "LOESS")
ll.pred = predict(ll.smooth, se = TRUE)


ll.df = data.frame(x=as.Date(ll.smooth$x, origin="1970-01-01"), fit=ll.pred$fit,
lb = ll.pred$fit - (1.96 * ll.pred$se),
ub = ll.pred$fit + (1.96 * ll.pred$se))
ll.df = ll.df[order(ll.df$x),]

 
 
    plot_ly(x=Eaagads$date, y=Eaagads$days_trading_vwap_price,
                 type="scatter", mode="lines", name="VWAP- Eaagads",
            line = list(color=rgb(0.8,0.8,0.8,0.8), width=4))%>%
    add_lines(x=as.Date(Eaagads$date_numeric, origin="1970-01-01"),
              y=ll.df$fit, name="Mean", line=list(color="#366092", width=2))%>%
    add_ribbons(x=as.Date(Eaagads$date_numeric, origin="1970-01-01"),
                ymin=ll.df$lb, ymax=ll.df$ub, name="95% CI", line=list(opacity=0.4, width=0, color="#366092"))%>%
  layout(title = "LOESS with confidence intervals")